1 Modeltime Forecasting

1.1 options & settings

For scrollable output

options(scipen = 999)

1.3 install packages

# install.packages("modeltime")

1.4 Load libs

library(tidyverse)
library(tidymodels)
library(modeltime)
library(timetk)
library(lubridate)

1.5 Load data

read.csv("day.csv") %>%  head()
head(bike_sharing_daily)
bike_transactions_tbl <- bike_sharing_daily %>% 
                          select(dteday, cnt) %>% 
                          set_names(c("date","value"))

head(bike_transactions_tbl)

1.5.1 Visualizing loaded data

bike_transactions_tbl %>% 
  timetk::plot_time_series(date, value)


#  timetk::plot_time_series(date, value, .interactive = FALSE)

1.6 Train Test

  • Setting assess = "3 months" tells the function to use the last 3-months of data as the testing set.
  • Setting cumulative = TRUE tells the sampling to use all of the prior data as the training set.
splits <- bike_transactions_tbl %>% 
            time_series_split(assess = "3 months", cumulative = TRUE)

1.6.1 Visulaizing Train test split


splits %>% 
  timetk::tk_time_series_cv_plan() %>% 
  timetk::plot_time_series_cv_plan(date, value)

1.7 Modeling

1.7.1 Automatic Models

1.7.1.1 Auto Arima

model_fit_arima <- modeltime::arima_reg() %>% 
                    parsnip::set_engine("auto_arima") %>% 
                    parsnip::fit(value ~ date, training(splits))

model_fit_arima
parsnip model object

Fit time:  5.8s 
Series: outcome 
ARIMA(0,1,3) with drift 

Coefficients:
          ma1      ma2      ma3   drift
      -0.6106  -0.1868  -0.0673  9.3169
s.e.   0.0396   0.0466   0.0398  4.6225

sigma^2 estimated as 730568:  log likelihood=-5227.22
AIC=10464.44   AICc=10464.53   BIC=10486.74

1.7.1.2 Prophet

model_fit_prophet <- prophet_reg() %>%
                      set_engine("prophet", yearly.seasonality = TRUE) %>%
                      fit(value ~ date, training(splits))

model_fit_prophet
parsnip model object

Fit time:  1.4s 
PROPHET Model
- growth: 'linear'
- n.changepoints: 25
- changepoint.range: 0.8
- yearly.seasonality: 'auto'
- weekly.seasonality: 'auto'
- daily.seasonality: 'auto'
- seasonality.mode: 'additive'
- changepoint.prior.scale: 0.05
- seasonality.prior.scale: 10
- holidays.prior.scale: 10
- logistic_cap: NULL
- logistic_floor: NULL
- extra_regressors: 0

1.7.2 ML Models

Machine learning models are more complex than the automated models. This complexity typically requires a workflow (sometimes called a pipeline in other languages). The general process goes like this:

  • Create Preprocessing Recipe

  • Create Model Specifications

  • Use Workflow to combine Model Spec and Preprocessing, and Fit Model

1.7.2.1 Preprocessing Recipe

First, I’ll create a preprocessing recipe using recipe() and adding time series steps. The process uses the “date” column to create 45 new features that I’d like to model. These include time-series signature features and fourier series.

recipe_spec <- recipe(value ~ date, training(splits)) %>% 
                step_timeseries_signature(date) %>% 
                step_rm(contains("am.pm"), contains("hour"), contains("minute"),
                        contains("second"), contains("xts")) %>% 
                step_fourier(date, period = 365, K = 5) %>% 
                step_dummy(all_nominal())

recipe_spec %>% 
  prep() %>% 
  juice()

With a recipe in-hand, we can set up our machine learning pipelines.

1.7.2.2 Elastic Net / glmnet

model_spec_glmnet <- linear_reg(penalty = 0.01, mixture = 0.5) %>% 
                      set_engine("glmnet")

model_spec_glmnet
Linear Regression Model Specification (regression)

Main Arguments:
  penalty = 0.01
  mixture = 0.5

Computational engine: glmnet 
workflow_fit_glmnet <- workflow() %>% 
  add_model(model_spec_glmnet) %>% 
  add_recipe(recipe_spec %>% step_rm(date)) %>% 
  fit(training(splits))

workflow_fit_glmnet
== Workflow [trained] ==========================================================
Preprocessor: Recipe
Model: linear_reg()

-- Preprocessor ----------------------------------------------------------------
5 Recipe Steps

* step_timeseries_signature()
* step_rm()
* step_fourier()
* step_dummy()
* step_rm()

-- Model -----------------------------------------------------------------------

Call:  glmnet::glmnet(x = as.matrix(x), y = y, family = "gaussian",      alpha = ~0.5) 

   Df  %Dev  Lambda
1   0  0.00 2856.00
2   1  5.71 2602.00
3   2 11.22 2371.00
4   2 19.05 2160.00
5   3 26.52 1968.00
6   3 33.31 1793.00
7   3 39.30 1634.00
8   4 44.58 1489.00
9   4 49.32 1357.00
10  5 53.41 1236.00
11  5 56.95 1126.00
12  5 59.98 1026.00
13  5 62.58  935.10
14  5 64.81  852.00
15  5 66.71  776.30
16  5 68.34  707.30
17  5 69.72  644.50
18  5 70.91  587.20
19  5 71.91  535.10
20  5 72.77  487.50
21  6 73.52  444.20
22  6 74.19  404.80
23  6 74.77  368.80
24  6 75.25  336.00
25  6 75.65  306.20
26  6 75.99  279.00
27  7 76.35  254.20
28  7 76.68  231.60
29  8 76.99  211.00
30  8 77.26  192.30
31 10 77.51  175.20
32 10 77.74  159.60
33 10 77.93  145.50
34 10 78.09  132.50
35 11 78.24  120.80
36 12 78.36  110.00
37 13 78.47  100.30
38 14 78.58   91.36
39 14 78.66   83.24
40 15 78.75   75.85
41 15 78.81   69.11
42 15 78.87   62.97
43 17 78.93   57.37
44 18 78.99   52.28
45 18 79.05   47.63
46 18 79.09   43.40

...
and 42 more lines.

1.7.2.3 RandomForest

model_spec_rf <- rand_forest(trees = 500, min_n = 50) %>% 
                  set_engine("randomForest")

workflow_fit_rf <- workflow() %>% 
  add_model(model_spec_rf) %>% 
  add_recipe(recipe_spec %>% step_rm(date)) %>%
  fit(training(splits))

workflow_fit_rf
== Workflow [trained] ==========================================================
Preprocessor: Recipe
Model: rand_forest()

-- Preprocessor ----------------------------------------------------------------
5 Recipe Steps

* step_timeseries_signature()
* step_rm()
* step_fourier()
* step_dummy()
* step_rm()

-- Model -----------------------------------------------------------------------

Call:
 randomForest(x = as.data.frame(x), y = y, ntree = ~500, nodesize = ~50) 
               Type of random forest: regression
                     Number of trees: 500
No. of variables tried at each split: 15

          Mean of squared residuals: 701813.6
                    % Var explained: 80.94

1.7.3 New Hybrid Models

1.7.3.1 Prophet Boost

The Prophet Boost algorithm combines Prophet with XGBoost to get the best of both worlds (i.e. Prophet Automation + Machine Learning). The algorithm works by:

  1. First modeling the univariate series using Prophet

  2. Using regressors supplied via the preprocessing recipe (remember our recipe generated 45 new features), and regressing the Prophet Residuals with the XGBoost model

We can set the model up using a workflow just like with the machine learning algorithms.

model_spec_prophet_boost <- prophet_boost() %>% 
                              set_engine("prophet_xgboost", yearly.seasonality = TRUE)

workflow_fit_prophet_boost <- workflow() %>% 
  add_model(model_spec_prophet_boost) %>% 
  add_recipe(recipe_spec) %>% 
  fit(training(splits))

1.8 Modeltime Table

model_table <- modeltime_table(
                  model_fit_arima,
                  model_fit_prophet,
                  workflow_fit_glmnet,
                  workflow_fit_rf,
                  workflow_fit_prophet_boost
                  )

model_table
# Modeltime Table

1.9 Claibration

calibration_table <- model_table %>% 
  modeltime_calibrate(testing(splits))

calibration_table
# Modeltime Table

1.10 Forecasting

library(plotly)
calibration_table %>% 
  modeltime_forecast(actual_data = bike_transactions_tbl) %>% 
  plot_modeltime_forecast() %>% 
  layout(legend = list(x = -0.5, y = 1.0))


  # for more legend position options check out https://plotly.com/r/legend/
P_results1 <- calibration_table %>% 
  modeltime_forecast(actual_data = bike_transactions_tbl) %>% 
  plot_modeltime_forecast() %>% 
  layout(legend = list(orientation = "h", y = -0.2))
Using '.calibration_data' to forecast.
P_results1
library(htmlwidgetsl)
Error in library(htmlwidgetsl) : 
  there is no package called ‘htmlwidgetsl’
htmlwidgets::saveWidget(P_results1, "forecasting_results.html")

1.11 Accuracy (Testing set)

calibration_table %>% 
  modeltime_accuracy() %>% 
  table_modeltime_accuracy(.interactive = FALSE)
Accuracy Table
.model_id .model_desc .type mae mape mase smape rmse rsq
1 ARIMA(0,1,3) WITH DRIFT Test 2540.11 474.89 2.74 46.00 3188.09 0.39
2 PROPHET Test 3052.52 513.91 3.30 51.51 3707.56 0.29
3 GLMNET Test 1243.53 332.96 1.34 29.20 1693.58 0.48
4 RANDOMFOREST Test 1335.74 334.77 1.44 30.60 1851.42 0.48
5 PROPHET W/ XGBOOST ERRORS Test 2528.39 412.40 2.73 46.02 3139.75 0.07

1.12 Refit and Forecast Forward

calibration_table %>% 
  
  # Remove arima model with low accuracy
  filter(.model_id !=1) %>% 
  
  # Refit & Forecast Forward
  modeltime_refit(bike_transactions_tbl) %>% 
  modeltime_forecast(h = "12 months", actual_data = bike_transactions_tbl) %>%
  plot_modeltime_forecast() %>% 
  layout(legend = list(orientation = "h", y = -0.2))
---
title: "Modeltime Forecasting r-bloggers"
output: 
  html_notebook:
    highlight: tango
    df_print: paged
    toc: true
    toc_float: 
      collapsed: false
      smooth_scroll: false
    number_sections: true
    toc_depth: 6
---


# Modeltime Forecasting

## options & settings

For scrollable output

```{css, echo=FALSE}
.scroll-100 {
  max-height: 100px;
  overflow-y: auto;
  background-color: inherit;
}


h1, #TOC>ul>li {
  color: #B64D3A;
}

h2, #TOC>ul>ul>li {
  color: #000000;
}

h3, #TOC>ul>ul>ul>li {
  color: #643cb2;
}

h4, #TOC>ul>ul>ul>ul>li {
  color: #ae0058;
}

h5, #TOC>ul>ul>ul>ul>ul>li {
  color: #ffa447;
}

h6, #TOC>ul>ul>ul>ul>ul>ul>li {
  color: #DAE3D9;
}

```


```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE, message = FALSE, warning = FALSE, dpi = 300 ,attr.output='style="max-height: 300px;"', out.width = "100%", fig.width=12)
```


```{r}
options(scipen = 999)
```


## from

https://www.business-science.io/code-tools/2020/06/29/introducing-modeltime.html
https://www.r-bloggers.com/2020/06/introducing-modeltime-tidy-time-series-forecasting-using-tidymodels/


**bike sharing data from**

Available within library by name `bike_sharing_daily` or download from below link

https://archive.ics.uci.edu/ml/machine-learning-databases/00275/

**Reference of some EDA on Bike sharing data **

https://www.rpubs.com/shayini/bike_sharing
http://rstudio-pubs-static.s3.amazonaws.com/158595_1f520fd8d8e34a5ab3a127376f2f6169.html


## install packages

```{r }
# install.packages("modeltime")
```

## Load libs

```{r }
library(tidyverse)
library(tidymodels)
library(modeltime)
library(timetk)
library(lubridate)
```

## Load data

```{r}
read.csv("day.csv") %>%  head()
```


```{r}
head(bike_sharing_daily)
```


```{r}
bike_transactions_tbl <- bike_sharing_daily %>% 
                          select(dteday, cnt) %>% 
                          set_names(c("date","value"))

head(bike_transactions_tbl)
```


### Visualizing loaded data

```{r}
bike_transactions_tbl %>% 
  timetk::plot_time_series(date, value)

#  timetk::plot_time_series(date, value, .interactive = FALSE)
```

## Train Test


  - Setting `assess = "3 months"` tells the function to use the last 3-months of data as the testing set.
  - Setting `cumulative = TRUE` tells the sampling to use all of the prior data as the training set.


```{r}
splits <- bike_transactions_tbl %>% 
            time_series_split(assess = "3 months", cumulative = TRUE)
```

### Visulaizing Train test split

```{r}

splits %>% 
  timetk::tk_time_series_cv_plan() %>% 
  timetk::plot_time_series_cv_plan(date, value)
```

## Modeling

### Automatic Models

#### Auto Arima

```{r}
model_fit_arima <- modeltime::arima_reg() %>% 
                    parsnip::set_engine("auto_arima") %>% 
                    parsnip::fit(value ~ date, training(splits))

model_fit_arima
```

#### Prophet

```{r}
model_fit_prophet <- prophet_reg() %>%
                      set_engine("prophet", yearly.seasonality = TRUE) %>%
                      fit(value ~ date, training(splits))

model_fit_prophet
```

### ML Models


Machine learning models are more complex than the automated models. This complexity typically requires a **workflow** (sometimes called a pipeline in other languages). The general process goes like this:

  - **Create Preprocessing Recipe**
  
  - **Create Model Specifications**
  
  - **Use Workflow to combine Model Spec and Preprocessing, and Fit Model**


#### Preprocessing Recipe

First, I’ll create a preprocessing recipe using `recipe()` and adding time series steps. The process uses the `“date”` column to create `45 new features` that I’d like to model. These include time-series signature features and fourier series.

```{r}
recipe_spec <- recipe(value ~ date, training(splits)) %>% 
                step_timeseries_signature(date) %>% 
                step_rm(contains("am.pm"), contains("hour"), contains("minute"),
                        contains("second"), contains("xts")) %>% 
                step_fourier(date, period = 365, K = 5) %>% 
                step_dummy(all_nominal())

recipe_spec %>% 
  prep() %>% 
  juice()
```


With a recipe in-hand, we can set up our machine learning pipelines.


#### Elastic Net / glmnet

```{r}
model_spec_glmnet <- linear_reg(penalty = 0.01, mixture = 0.5) %>% 
                      set_engine("glmnet")

model_spec_glmnet
```

```{r}
workflow_fit_glmnet <- workflow() %>% 
  add_model(model_spec_glmnet) %>% 
  add_recipe(recipe_spec %>% step_rm(date)) %>% 
  fit(training(splits))

workflow_fit_glmnet
```

#### RandomForest

```{r}
model_spec_rf <- rand_forest(trees = 500, min_n = 50) %>% 
                  set_engine("randomForest")

workflow_fit_rf <- workflow() %>% 
  add_model(model_spec_rf) %>% 
  add_recipe(recipe_spec %>% step_rm(date)) %>%
  fit(training(splits))

workflow_fit_rf
```

### New Hybrid Models

#### Prophet Boost

The Prophet Boost algorithm combines Prophet with XGBoost to get the best of both worlds (i.e. Prophet Automation + Machine Learning). The algorithm works by:

  1. First modeling the univariate series using Prophet
  
  2. Using regressors supplied via the preprocessing recipe (remember our recipe generated 45 new features), and regressing the Prophet Residuals with the XGBoost model

We can set the model up using a workflow just like with the machine learning algorithms.

```{r}
model_spec_prophet_boost <- prophet_boost() %>% 
                              set_engine("prophet_xgboost", yearly.seasonality = TRUE)

workflow_fit_prophet_boost <- workflow() %>% 
  add_model(model_spec_prophet_boost) %>% 
  add_recipe(recipe_spec) %>% 
  fit(training(splits))


```


## Modeltime Table

```{r}
model_table <- modeltime_table(
                  model_fit_arima,
                  model_fit_prophet,
                  workflow_fit_glmnet,
                  workflow_fit_rf,
                  workflow_fit_prophet_boost
                  )

model_table
```


## Claibration

```{r}
calibration_table <- model_table %>% 
  modeltime_calibrate(testing(splits))

calibration_table
```

## Forecasting

```{r}
library(plotly)
```


```{r}
calibration_table %>% 
  modeltime_forecast(actual_data = bike_transactions_tbl) %>% 
  plot_modeltime_forecast() %>% 
  layout(legend = list(x = -0.5, y = 1.0))

  # for more legend position options check out https://plotly.com/r/legend/

```


```{r}
P_results1 <- calibration_table %>% 
  modeltime_forecast(actual_data = bike_transactions_tbl) %>% 
  plot_modeltime_forecast() %>% 
  layout(legend = list(orientation = "h", y = -0.2))

P_results1
```

```{r}
library(htmlwidgets)
```

```{r}
htmlwidgets::saveWidget(P_results1, "forecasting_results.html")
```


## Accuracy (Testing set)

```{r}
calibration_table %>% 
  modeltime_accuracy() %>% 
  table_modeltime_accuracy(.interactive = FALSE)
```


## Refit and Forecast Forward

```{r, out.width= "100%"}
calibration_table %>% 
  
  # Remove arima model with low accuracy
  filter(.model_id !=1) %>% 
  
  # Refit & Forecast Forward
  modeltime_refit(bike_transactions_tbl) %>% 
  modeltime_forecast(h = "12 months", actual_data = bike_transactions_tbl) %>%
  plot_modeltime_forecast() %>% 
  layout(legend = list(orientation = "h", y = -0.2))
```
























